library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.1 but the current version is
## 4.4.2; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(pheatmap)
library(clusterProfiler)
## 
## clusterProfiler v4.14.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024,
## 5(6):100722
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(ggplot2)

expr_data <- read.delim(
  "GSE85241_dataset.csv.gz",
  header = TRUE,
  row.names = 1,
  check.names = FALSE,
  stringsAsFactors = FALSE
)

pancreas <- CreateSeuratObject(
  expr_data,
  project = "MuraroPancreas",
  min.cells = 3,
  min.features = 500
)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Data is of class data.frame. Coercing to dgCMatrix.
pancreas$donor <- factor(sub("-.*", "", pancreas$orig.ident))

Data Overview

dim(expr_data)
## [1] 19140  3072
summary(pancreas$nFeature_RNA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     508    3969    5037    5008    6098   12194
summary(pancreas$nCount_RNA)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    915.6  11738.3  18003.3  21984.4  27092.8 261046.3

Workflow

pancreas <- NormalizeData(pancreas)
## Normalizing layer: counts
pancreas <- FindVariableFeatures(pancreas, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
length(VariableFeatures(pancreas))
## [1] 2000
pancreas <- ScaleData(pancreas)
## Centering and scaling data matrix
pancreas <- RunPCA(pancreas, features = VariableFeatures(pancreas))
## PC_ 1 
## Positive:  IFITM3--chr11, ZFP36L1--chr14, REST--chr4, MYH9--chr22, S100A11--chr1, SOX4--chr6, NOTCH2--chr1, MYL12A--chr18, PMEPA1--chr20, LGALS3--chr14 
##     RBPMS--chr8, CAV2--chr7, YAP1--chr11, TMSB4X--chrX, EPS8--chr12, CLIC4--chr1, ANO6--chr12, PPIC--chr5, AHNAK--chr11, NFIB--chr9 
##     TPM1--chr15, MSN--chrX, ANXA2P2--chr9, CDC42EP1--chr22, CD44--chr11, RHOC--chr1, ANXA2--chr15, LITAF--chr16, OSMR--chr5, LAMC1--chr1 
## Negative:  SCG3--chr15, SYT7--chr11, ABCC8--chr11, ERO1LB--chr1, VGF--chr7, GAD2--chr10, MIR7-3HG--chr19, PLCXD3--chr5, LOC728392--chr17, KIF5C--chr2 
##     MLXIPL--chr7, G6PC2--chr2, STMN2--chr8, STX1A--chr7, PEG10--chr7, RGS4--chr1, CFC1--chr2, WNT4--chr1, SCD--chr10, RASD1--chr17 
##     ELAVL4--chr1, C14orf132--chr14, PRUNE2--chr9, SYT13--chr11, KCNK16--chr6, PCP4--chr21, CRYBA2--chr2, ATP2A3--chr17, SORL1--chr11, MEIS2--chr15 
## PC_ 2 
## Positive:  CD24--chrY, ELF3--chr1, TACSTD2--chr1, KRT8--chr12, KIAA1522--chr1, CFB--chr6, KRT18--chr12, ANXA4--chr2, KRT7--chr12, CLDN4--chr7 
##     SERPINA3--chr14, OCLN--chr5, PDZK1IP1--chr1, RASEF--chr9, CLDN1--chr3, TMC5--chr16, TC2N--chr14, SDC4--chr20, TMPRSS2--chr21, ABCC3--chr17 
##     ERBB3--chr12, CXADR--chr21, LAD1--chr1, LCN2--chr9, TM4SF1--chr3, GATM--chr15, CLMN--chr14, ATP10B--chr5, CLDN10--chr13, NR5A2--chr1 
## Negative:  PDGFRB--chr5, SPARC--chr5, MRC2--chr17, COL6A3--chr2, CDH11--chr16, COL5A2--chr2, NID1--chr1, COL5A1--chr9, COL5A3--chr19, LAMA4--chr6 
##     COL15A1--chr9, FBN1--chr15, COL1A2--chr7, LOXL2--chr8, COL6A2--chr21, COL3A1--chr2, BGN--chrX, LUM--chr12, SFRP2--chr4, LTBP2--chr14 
##     ITGA11--chr15, SRPX2--chrX, THBS2--chr6, PRRX1--chr1, COL12A1--chr6, WNT5A--chr3, VCAN--chr5, LHFP--chr13, CYGB--chr17, EDNRA--chr4 
## PC_ 3 
## Positive:  CPA2--chr7, PRSS3P2--chr7, PRSS1--chr7, PLA2G1B--chr12, PNLIP--chr10, CTRC--chr1, KLK1--chr19, CPA1--chr7, GSTA2--chr6, CTRB1--chr16 
##     PRSS3--chr9, CTRB2--chr16, BCAT1--chr12, PNLIPRP2--chr10, REG1B--chr2, CEL--chr9, CPB1--chr3, ALB--chr4, CELA2A--chr1, AOX1--chr2 
##     SPINK1--chr5, PNLIPRP1--chr10, CELA3A--chr1, ALDOB--chr9, FAM129A--chr1, CBS--chr21, GSTA1--chr6, REG1A--chr2, MGST1--chr12, CTRL--chr16 
## Negative:  CFTR--chr7, ONECUT2--chr18, DCDC2--chr6, ALDH1A3--chr15, VTCN1--chr1, TINAGL1--chr1, AQP1--chr7, KRT19--chr17, SPP1--chr4, DEFB1--chr8 
##     KRT23--chr17, PDGFD--chr11, APCDD1--chr18, SLC4A4--chr4, PPARGC1A--chr4, PKHD1--chr6, THSD4--chr15, HSD17B2--chr16, SERPINA5--chr14, FUT3--chr19 
##     S100A14--chr1, BICC1--chr10, CRIM1--chr2, TFPI2--chr7, SCNN1A--chr12, IGFBP7--chr4, SLC3A1--chr2, CEACAM7--chr19, SLC1A1--chr9, HKDC1--chr10 
## PC_ 4 
## Positive:  CALD1--chr7, NOTCH3--chr19, COL5A1--chr9, CDH11--chr16, SFRP2--chr4, FAT1--chr4, COL5A2--chr2, COL6A1--chr21, LTBP2--chr14, ITGA11--chr15 
##     THBS2--chr6, PDGFRB--chr5, COL3A1--chr2, COL5A3--chr19, COL1A1--chr17, COL6A3--chr2, LUM--chr12, TNFAIP6--chr2, FN1--chr2, PRRX1--chr1 
##     LAMA2--chr6, COL12A1--chr6, ADAMTS2--chr5, SPON2--chr4, FMOD--chr1, EDNRA--chr4, COL6A2--chr21, GFPT2--chr5, PDE3A--chr12, COL1A2--chr7 
## Negative:  ABI3--chr17, PECAM1--chr17, SRGN--chr10, LAPTM5--chr1, GIMAP4--chr7, GMFG--chr19, ARHGDIB--chr12, TYROBP--chr19, CD53--chr1, KDR--chr4 
##     FLT1--chr13, ACVRL1--chr12, CLEC2B--chr12, CD93--chr20, ESAM--chr11, SOX18--chr20, EXOC3L2--chr19, ARHGAP31--chr3, FCER1G--chr1, LCP1--chr13 
##     TNFRSF1B--chr1, CALCRL--chr2, ELTD1--chr1, PLVAP--chr19, TIE1--chr1, CSF2RB--chr22, GIMAP8--chr7, GIMAP7--chr7, PRDM1--chr6, ERG--chr21 
## PC_ 5 
## Positive:  FLT1--chr13, ESAM--chr11, KDR--chr4, EXOC3L2--chr19, SOX18--chr20, ELTD1--chr1, PLVAP--chr19, CD93--chr20, CALCRL--chr2, GPR4--chr19 
##     PODXL--chr7, NOTCH4--chr6, LOC100505495--chr19, EMCN--chr4, RHOJ--chr14, ACVRL1--chr12, MYCT1--chr6, ECSCR--chr5, TIE1--chr1, CLEC14A--chr14 
##     PTPRB--chr12, MMRN2--chr10, ERG--chr21, GPR116--chr6, S1PR1--chr1, ROBO4--chr11, PASK--chr2, FLT4--chr5, COL13A1--chr10, CDH5--chr16 
## Negative:  TYROBP--chr19, CD53--chr1, LCP1--chr13, FCER1G--chr1, ITGB2--chr21, MS4A7--chr11, LAPTM5--chr1, C1QB--chr1, MYO1F--chr19, C1QC--chr1 
##     PTPRC--chr1, PLA2G7--chr6, RUNX3--chr1, RGS1--chr1, ACP5--chr19, ALOX5AP--chr13, CCR1--chr3, CD300A--chr17, FYB--chr5, SDS--chr12 
##     IGSF6--chr16, HCK--chr20, CSF1R--chr5, FCGR2A--chr1, NCKAP1L--chr12, C1QA--chr1, EVI2B--chr17, IFI30--chr19, PLEK--chr2, CYBB--chrX
ElbowPlot(pancreas, ndims = 30)

pancreas <- FindNeighbors(pancreas, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
pancreas <- FindClusters(pancreas, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2461
## Number of edges: 85833
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9559
## Number of communities: 9
## Elapsed time: 0 seconds
pancreas <- RunTSNE(pancreas, dims = 1:15)
DimPlot(pancreas, reduction = "tsne", label = TRUE, label.size = 8)

pancreas$orig.ident <- factor(sub("-.*", "", pancreas$orig.ident))
DimPlot(pancreas, reduction = "tsne", group.by = "orig.ident", label.size = 6)

## Marker Detection

markers <- FindAllMarkers(pancreas, only.pos=TRUE, logfc.threshold=0.25, min.pct=0.25)
## Calculating cluster 0
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
markers %>% group_by(cluster) %>% slice_max(n = 10, order_by = avg_log2FC)
## # A tibble: 90 × 7
## # Groups:   cluster [9]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene          
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>         
##  1 1.32e-239       5.42 0.645 0.043 2.34e-235 0       TMEM236--chr10
##  2 3.28e-227       5.28 0.754 0.186 5.83e-223 0       PLCE1--chr10  
##  3 5.97e-298       5.20 0.844 0.166 1.06e-293 0       LOXL4--chr10  
##  4 7.27e-271       5.05 0.706 0.048 1.29e-266 0       PTPRT--chr20  
##  5 0               5.05 0.909 0.136 0         0       IRX2--chr5    
##  6 3.56e-195       4.84 0.678 0.149 6.33e-191 0       KLHL41--chr2  
##  7 5.67e-201       4.62 0.6   0.061 1.01e-196 0       DPP4--chr2    
##  8 3.04e-296       4.53 0.862 0.196 5.40e-292 0       CRYBA2--chr2  
##  9 1.75e-186       4.52 0.548 0.041 3.12e-182 0       SPOCK3--chr4  
## 10 0               4.51 1     1     0         0       GCG--chr2     
## # ℹ 80 more rows

Cluster Annotation

new.labels <- c(
  "alpha", "beta", "acinar", "ductal",
  "delta", "PP", "mesenchymal",
  "endothelial_cells", "macrophages"
)
names(new.labels) <- levels(pancreas)
pancreas <- RenameIdents(pancreas, new.labels)
DimPlot(pancreas, reduction = "tsne", label = TRUE, label.size = 6)

Clean Gene Names

old_genes <- rownames(pancreas)
clean_genes <- sapply(old_genes, function(x) strsplit(x, split = "--|__")[[1]][1])
rownames(pancreas) <- clean_genes
head(rownames(pancreas), 20)
##  [1] "A1BG-AS1" "A1BG"     "A1CF"     "A2M-AS1"  "A2ML1"    "A2M"     
##  [7] "A4GALT"   "AAAS"     "AACS"     "AADAC"    "AADAT"    "AAED1"   
## [13] "AAGAB"    "AAK1"     "AAMDC"    "AAMP"     "AAR2"     "AARD"    
## [19] "AARS2"    "AARS"

Feature Plots

FeaturePlot(
  pancreas,
  features = c("RGS4","PFKFB2","FRZB","CHN2"),
  reduction = "tsne",
  cols = c("lightgray","coral","yellow","lightgreen"),
  pt.size = 1
)
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

FeaturePlot(
  pancreas,
  features = c("GHRL","PHGR1","VTN","ANXA13"),
  reduction = "tsne",
  cols = c("lightgray","coral"),
  pt.size = 1
)

GO Enrichment for Epsilon Cells

genes_epsilon <- c("GHRL","ANXA13","PHGR1","ACSL1","FRZB","SPTSSB","ASGR1","HEPACAM2","VTN","SERPINA1")
entrez_epsilon <- bitr(genes_epsilon, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)$ENTREZID
## 'select()' returned 1:1 mapping between keys and columns
ego_mf <- enrichGO(
  gene = entrez_epsilon,
  OrgDb = org.Hs.eg.db,
  keyType = "ENTREZID",
  ont = "MF",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.2
)
dotplot(ego_mf) + ggtitle("GO:MF enrichment – epsilon cell genes")

GO Enrichment for Immune Cells

genes_immune <- c("FPR3","C1QA","CCL3","C1QC","LILRB2","MS4A6A","TREM2","RNASE6","OLR1","MPEG1")
entrez_immune <- bitr(genes_immune, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)$ENTREZID
## 'select()' returned 1:1 mapping between keys and columns
ego_mf_immune <- enrichGO(
  gene = entrez_immune,
  OrgDb = org.Hs.eg.db,
  keyType = "ENTREZID",
  ont = "MF",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.2
)
dotplot(ego_mf_immune) + ggtitle("GO:MF enrichment – immune cell genes")

Sub-clustering Major Populations

expr_data <- read.delim(
  "GSE85241_dataset.csv.gz",
  header = TRUE,
  row.names = 1,
  check.names = FALSE,
  stringsAsFactors = FALSE
)

pancreas <- CreateSeuratObject(
  expr_data,
  project = "MuraroPancreas",
  min.cells = 3,
  min.features = 500
)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Data is of class data.frame. Coercing to dgCMatrix.
pancreas <- NormalizeData(pancreas)
## Normalizing layer: counts
pancreas <- FindVariableFeatures(pancreas, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
length(VariableFeatures(pancreas))
## [1] 2000
pancreas <- ScaleData(pancreas)
## Centering and scaling data matrix
pancreas <- RunPCA(pancreas, features = VariableFeatures(pancreas))
## PC_ 1 
## Positive:  IFITM3--chr11, ZFP36L1--chr14, REST--chr4, MYH9--chr22, S100A11--chr1, SOX4--chr6, NOTCH2--chr1, MYL12A--chr18, PMEPA1--chr20, LGALS3--chr14 
##     RBPMS--chr8, CAV2--chr7, YAP1--chr11, TMSB4X--chrX, EPS8--chr12, CLIC4--chr1, ANO6--chr12, PPIC--chr5, AHNAK--chr11, NFIB--chr9 
##     TPM1--chr15, MSN--chrX, ANXA2P2--chr9, CDC42EP1--chr22, CD44--chr11, RHOC--chr1, ANXA2--chr15, LITAF--chr16, OSMR--chr5, LAMC1--chr1 
## Negative:  SCG3--chr15, SYT7--chr11, ABCC8--chr11, ERO1LB--chr1, VGF--chr7, GAD2--chr10, MIR7-3HG--chr19, PLCXD3--chr5, LOC728392--chr17, KIF5C--chr2 
##     MLXIPL--chr7, G6PC2--chr2, STMN2--chr8, STX1A--chr7, PEG10--chr7, RGS4--chr1, CFC1--chr2, WNT4--chr1, SCD--chr10, RASD1--chr17 
##     ELAVL4--chr1, C14orf132--chr14, PRUNE2--chr9, SYT13--chr11, KCNK16--chr6, PCP4--chr21, CRYBA2--chr2, ATP2A3--chr17, SORL1--chr11, MEIS2--chr15 
## PC_ 2 
## Positive:  CD24--chrY, ELF3--chr1, TACSTD2--chr1, KRT8--chr12, KIAA1522--chr1, CFB--chr6, KRT18--chr12, ANXA4--chr2, KRT7--chr12, CLDN4--chr7 
##     SERPINA3--chr14, OCLN--chr5, PDZK1IP1--chr1, RASEF--chr9, CLDN1--chr3, TMC5--chr16, TC2N--chr14, SDC4--chr20, TMPRSS2--chr21, ABCC3--chr17 
##     ERBB3--chr12, CXADR--chr21, LAD1--chr1, LCN2--chr9, TM4SF1--chr3, GATM--chr15, CLMN--chr14, ATP10B--chr5, CLDN10--chr13, NR5A2--chr1 
## Negative:  PDGFRB--chr5, SPARC--chr5, MRC2--chr17, COL6A3--chr2, CDH11--chr16, COL5A2--chr2, NID1--chr1, COL5A1--chr9, COL5A3--chr19, LAMA4--chr6 
##     COL15A1--chr9, FBN1--chr15, COL1A2--chr7, LOXL2--chr8, COL6A2--chr21, COL3A1--chr2, BGN--chrX, LUM--chr12, SFRP2--chr4, LTBP2--chr14 
##     ITGA11--chr15, SRPX2--chrX, THBS2--chr6, PRRX1--chr1, COL12A1--chr6, WNT5A--chr3, VCAN--chr5, LHFP--chr13, CYGB--chr17, EDNRA--chr4 
## PC_ 3 
## Positive:  CPA2--chr7, PRSS3P2--chr7, PRSS1--chr7, PLA2G1B--chr12, PNLIP--chr10, CTRC--chr1, KLK1--chr19, CPA1--chr7, GSTA2--chr6, CTRB1--chr16 
##     PRSS3--chr9, CTRB2--chr16, BCAT1--chr12, PNLIPRP2--chr10, REG1B--chr2, CEL--chr9, CPB1--chr3, ALB--chr4, CELA2A--chr1, AOX1--chr2 
##     SPINK1--chr5, PNLIPRP1--chr10, CELA3A--chr1, ALDOB--chr9, FAM129A--chr1, CBS--chr21, GSTA1--chr6, REG1A--chr2, MGST1--chr12, CTRL--chr16 
## Negative:  CFTR--chr7, ONECUT2--chr18, DCDC2--chr6, ALDH1A3--chr15, VTCN1--chr1, TINAGL1--chr1, AQP1--chr7, KRT19--chr17, SPP1--chr4, DEFB1--chr8 
##     KRT23--chr17, PDGFD--chr11, APCDD1--chr18, SLC4A4--chr4, PPARGC1A--chr4, PKHD1--chr6, THSD4--chr15, HSD17B2--chr16, SERPINA5--chr14, FUT3--chr19 
##     S100A14--chr1, BICC1--chr10, CRIM1--chr2, TFPI2--chr7, SCNN1A--chr12, IGFBP7--chr4, SLC3A1--chr2, CEACAM7--chr19, SLC1A1--chr9, HKDC1--chr10 
## PC_ 4 
## Positive:  CALD1--chr7, NOTCH3--chr19, COL5A1--chr9, CDH11--chr16, SFRP2--chr4, FAT1--chr4, COL5A2--chr2, COL6A1--chr21, LTBP2--chr14, ITGA11--chr15 
##     THBS2--chr6, PDGFRB--chr5, COL3A1--chr2, COL5A3--chr19, COL1A1--chr17, COL6A3--chr2, LUM--chr12, TNFAIP6--chr2, FN1--chr2, PRRX1--chr1 
##     LAMA2--chr6, COL12A1--chr6, ADAMTS2--chr5, SPON2--chr4, FMOD--chr1, EDNRA--chr4, COL6A2--chr21, GFPT2--chr5, PDE3A--chr12, COL1A2--chr7 
## Negative:  ABI3--chr17, PECAM1--chr17, SRGN--chr10, LAPTM5--chr1, GIMAP4--chr7, GMFG--chr19, ARHGDIB--chr12, TYROBP--chr19, CD53--chr1, KDR--chr4 
##     FLT1--chr13, ACVRL1--chr12, CLEC2B--chr12, CD93--chr20, ESAM--chr11, SOX18--chr20, EXOC3L2--chr19, ARHGAP31--chr3, FCER1G--chr1, LCP1--chr13 
##     TNFRSF1B--chr1, CALCRL--chr2, ELTD1--chr1, PLVAP--chr19, TIE1--chr1, CSF2RB--chr22, GIMAP8--chr7, GIMAP7--chr7, PRDM1--chr6, ERG--chr21 
## PC_ 5 
## Positive:  FLT1--chr13, ESAM--chr11, KDR--chr4, EXOC3L2--chr19, SOX18--chr20, ELTD1--chr1, PLVAP--chr19, CD93--chr20, CALCRL--chr2, GPR4--chr19 
##     PODXL--chr7, NOTCH4--chr6, LOC100505495--chr19, EMCN--chr4, RHOJ--chr14, ACVRL1--chr12, MYCT1--chr6, ECSCR--chr5, TIE1--chr1, CLEC14A--chr14 
##     PTPRB--chr12, MMRN2--chr10, ERG--chr21, GPR116--chr6, S1PR1--chr1, ROBO4--chr11, PASK--chr2, FLT4--chr5, COL13A1--chr10, CDH5--chr16 
## Negative:  TYROBP--chr19, CD53--chr1, LCP1--chr13, FCER1G--chr1, ITGB2--chr21, MS4A7--chr11, LAPTM5--chr1, C1QB--chr1, MYO1F--chr19, C1QC--chr1 
##     PTPRC--chr1, PLA2G7--chr6, RUNX3--chr1, RGS1--chr1, ACP5--chr19, ALOX5AP--chr13, CCR1--chr3, CD300A--chr17, FYB--chr5, SDS--chr12 
##     IGSF6--chr16, HCK--chr20, CSF1R--chr5, FCGR2A--chr1, NCKAP1L--chr12, C1QA--chr1, EVI2B--chr17, IFI30--chr19, PLEK--chr2, CYBB--chrX
ElbowPlot(pancreas, ndims = 30)

pancreas <- FindNeighbors(pancreas, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
pancreas <- FindClusters(pancreas, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2461
## Number of edges: 85833
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9559
## Number of communities: 9
## Elapsed time: 0 seconds
pancreas <- RunTSNE(pancreas, dims = 1:15)

new.labels <- c(
  "alpha", "beta", "acinar", "ductal",
  "delta", "PP", "mesenchymal",
  "endothelial_cells", "macrophages"
)
names(new.labels) <- levels(pancreas)
pancreas <- RenameIdents(pancreas, new.labels)
alpha.sub <- subset(pancreas, idents = "alpha")

alpha.sub <- NormalizeData(alpha.sub)
## Normalizing layer: counts
alpha.sub <- FindVariableFeatures(alpha.sub, selection.method = "vst", nfeatures = 1000)
## Finding variable features for layer counts
all.genes.alpha <- rownames(alpha.sub)
alpha.sub <- ScaleData(alpha.sub, features = all.genes.alpha)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
alpha.sub <- RunPCA(alpha.sub, features = VariableFeatures(alpha.sub), npcs = 30)
## PC_ 1 
## Positive:  CPE--chr4, CHGB--chr20, SCG2--chr2, CHGA--chr14, ALDH1A1--chr9, KIAA1244--chr6, PAM--chr5, GNAS--chr20, GCH1--chr14, VGF--chr7 
##     ENPP2--chr8, KCTD12--chr13, NEUROD1--chr2, MAGED1--chrX, CNIH2--chr11, SMARCA1--chrX, RGS4--chr1, CRYBA2--chr2, SURF4--chr9, SERPINE2--chr2 
##     ERO1LB--chr1, MUC13--chr3, GPR56--chr16, TMED3--chr15, PDK4--chr7, FEV--chr2, LOXL4--chr10, MIR7-3HG--chr19, ABCC8--chr11, PLCXD3--chr5 
## Negative:  SRGN--chr10, RGS1--chr1, KIT--chr4, ARHGDIB--chr12, CPA3--chr3, ALOX5AP--chr13, LAPTM5--chr1, LCP1--chr13, SAMSN1--chr21, RGS10--chr10 
##     GATA2--chr3, RAB27B--chr18, LPCAT2--chr16, FCER1G--chr1, RAC2--chr22, LCP2--chr5, TPSB2--chr16, IL4R--chr16, CD37--chr19, HPGDS--chr4 
##     PRKCB--chr16, IKZF1--chr7, CSF2RB--chr22, TPSAB1--chr16, TESPA1--chr12, SMIM3--chr5, PTGS1--chr9, CD22--chr19, BTK--chrX, ANXA1--chr9 
## PC_ 2 
## Positive:  PRC1--chr15, MKI67--chr10, TOP2A--chr17, ZWINT--chr10, TPX2--chr20, CIT--chr12, PBK--chr8, KIF4A--chrX, NCAPH--chr2, DIAPH3--chr13 
##     UBE2C--chr20, NUSAP1--chr15, CKAP2L--chr2, CENPF--chr1, ESCO2--chr8, CDCA2--chr8, TACC3--chr4, FOXM1--chr12, MAD2L1--chr4, CDCA8--chr1 
##     CCNB2--chr15, IQGAP3--chr1, BUB1B--chr15, ANLN--chr7, PTTG1--chr5, KIF23--chr15, SKA3--chr13, AURKB--chr17, NCAPG--chr4, SAPCD2--chr9 
## Negative:  SST--chr3, PPY--chr17, PLA2G1B--chr12, IAPP--chr12, CPA1--chr7, CEL--chr9, RBP4--chr10, MTRNR2L2--chr5, CPB1--chr3, CLPS--chr6 
##     CELA3B--chr1, GSTA1--chr6, CELA3A--chr1, CTRB1--chr16, CELA2A--chr1, PRSS3P2--chr7, TIMP2--chr17, PNLIPRP1--chr10, CTRB2--chr16, ALB--chr4 
##     INS--chr11, GP2--chr16, JUN--chr1, PRSS1--chr7, TACSTD2--chr1, CPA2--chr7, CELA2B--chr1, PNLIPRP2--chr10, DCN--chr12, RNASE1--chr14 
## PC_ 3 
## Positive:  CPE--chr4, SCG2--chr2, PAM--chr5, GCH1--chr14, KIAA1244--chr6, CHGB--chr20, ALDH1A1--chr9, HSD17B12--chr11, CNIH2--chr11, PDK4--chr7 
##     SURF4--chr9, ENPP2--chr8, VGF--chr7, MAGED1--chrX, HSP90B1--chr12, LOC728392--chr17, GPR56--chr16, TUBA1B--chr12, MUC13--chr3, LAPTM4B--chr8 
##     GNAS--chr20, CTSB--chr8, XBP1--chr22, KCTD12--chr13, SMOC1--chr14, TMEM236--chr10, LOXL4--chr10, PDE3B--chr11, ERO1LB--chr1, CAMK2G--chr10 
## Negative:  PLA2G1B--chr12, CTRB2--chr16, PRSS1--chr7, CPA1--chr7, PNLIP--chr10, CTRB1--chr16, REG1B--chr2, CPB1--chr3, REG1A--chr2, CPA2--chr7 
##     PRSS3P2--chr7, SPINK1--chr5, CLPS--chr6, CEL--chr9, CELA3A--chr1, IFITM3--chr11, PNLIPRP2--chr10, SERPINA3--chr14, CELA3B--chr1, CELA2A--chr1 
##     GSTA2--chr6, PNLIPRP1--chr10, GSTA1--chr6, DUOX2--chr15, TACSTD2--chr1, ZFP36L1--chr14, REG3A--chr2, KLK1--chr19, RNASE1--chr14, ALB--chr4 
## PC_ 4 
## Positive:  SST--chr3, PCSK1N--chrX, RBP4--chr10, KCNK17--chr6, G6PC2--chr2, C10orf10--chr10, LSAMP--chr3, PPY--chr17, SGOL1--chr3, CHGA--chr14 
##     NUSAP1--chr15, KIAA1524--chr3, POU2F2--chr19, NEUROD1--chr2, TOP2A--chr17, PRR11--chr17, ARRDC4--chr15, TNFSF10--chr3, RAD51AP1--chr12, PRC1--chr15 
##     POLQ--chr3, CKAP2L--chr2, SPAG5--chr17, MS4A2--chr11, ANLN--chr7, AURKB--chr17, BIRC5--chr17, SGOL2--chr2, KIF23--chr15, CTTNBP2--chr7 
## Negative:  REG1B--chr2, REG1A--chr2, PRSS1--chr7, PNLIP--chr10, SPINK1--chr5, CPA2--chr7, REG3A--chr2, CTRB2--chr16, PRSS3P2--chr7, PLA2G1B--chr12 
##     CPB1--chr3, SERPINA3--chr14, CELA3A--chr1, CPA1--chr7, SAT1--chrX, ACTG1--chr17, FTL--chr19, CTRB1--chr16, CD24--chrY, CLPS--chr6 
##     SQSTM1--chr5, CEL--chr9, FNDC3B--chr3, HLA-B--chr6, ANXA2--chr15, GSTA2--chr6, PNLIPRP2--chr10, PRSS3--chr9, GDF15--chr19, ACTB--chr7 
## PC_ 5 
## Positive:  PPY--chr17, PDK4--chr7, ANXA2--chr15, IRS2--chr13, C19orf10--chr19, IAPP--chr12, LSAMP--chr3, LDB2--chr4, RGS9--chr17, PEMT--chr17 
##     RPS4Y1--chrY, ADCY2--chr5, H1FX--chr3, LINC00473--chr6, DNER--chr2, ASB4--chr7, PPP1R15A--chr19, KIAA1644--chr22, DOK6--chr18, TNFRSF12A--chr16 
##     EDN3--chr20, VIM--chr10, PEG10--chr7, CD36--chr7, LMNA--chr1, LINC00348--chr13, RUNX1T1--chr8, MAP2--chr2, IGSF21--chr1, MYOM1--chr18 
## Negative:  PCP4--chr21, ARRDC4--chr15, REG3A--chr2, HLA-B--chr6, CECR1--chr22, TXNIP--chr1, ASPH--chr8, G6PC2--chr2, PBXIP1--chr1, SERPINI1--chr3 
##     PLEKHB1--chr11, C10orf10--chr10, NEUROD1--chr2, CHGA--chr14, STMN1--chr1, CD24--chrY, JUNB--chr19, DUSP1--chr5, PYROXD2--chr10, BACE2--chr21 
##     SMARCA1--chrX, COL1A1--chr17, FKBP11--chr12, SNHG5--chr6, FAM84A--chr2, METTL7A--chr12, ERO1LB--chr1, UCP2--chr11, ALDH1A1--chr9, GGH--chr8
## Warning: Number of dimensions changing from 50 to 30
ElbowPlot(alpha.sub, ndims = 30)

dims.alpha <- 1:30

alpha.sub <- FindNeighbors(alpha.sub, dims = dims.alpha)
## Computing nearest neighbor graph
## Computing SNN
alpha.sub <- FindClusters(alpha.sub, resolution = 0.2)   
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 932
## Number of edges: 38345
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8884
## Number of communities: 4
## Elapsed time: 0 seconds
alpha.sub <- RunUMAP(alpha.sub, dims = dims.alpha)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:07:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:07:07 Read 932 rows and found 30 numeric columns
## 17:07:07 Using Annoy for neighbor search, n_neighbors = 30
## 17:07:07 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:07:07 Writing NN index file to temp file /var/folders/c0/fhbpbpvd589539lj_whv4sc40000gn/T//Rtmprwi0No/file22e622809f11
## 17:07:07 Searching Annoy index using 1 thread, search_k = 3000
## 17:07:07 Annoy recall = 100%
## 17:07:08 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:07:08 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:07:08 Commencing optimization for 500 epochs, with 35398 positive edges
## 17:07:08 Using rng type: pcg
## 17:07:09 Optimization finished
DimPlot(alpha.sub, reduction = "umap", label = TRUE, pt.size = 0.5, label.size = 6) + ggtitle("Sub-clustering: alpha - cells")

markers_a <- FindAllMarkers(alpha.sub, only.pos=TRUE, logfc.threshold=0.25, min.pct=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
markers_a %>% group_by(cluster) %>% slice_max(n = 10, order_by = avg_log2FC)
## # A tibble: 40 × 7
## # Groups:   cluster [4]
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene           
##       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>          
##  1 9.48e-51       3.82 0.473 0.026  1.69e-46 0       XIST--chrX     
##  2 1.50e-54       3.79 0.488 0.024  2.67e-50 0       CYP4F24P--chr19
##  3 3.12e-51       2.21 0.616 0.142  5.55e-47 0       GSTT1--chr22   
##  4 1.15e-18       1.77 0.398 0.145  2.04e-14 0       NSG1--chr4     
##  5 5.38e-10       1.76 0.255 0.102  9.57e- 6 0       TENM2--chr5    
##  6 3.18e-31       1.46 0.722 0.386  5.65e-27 0       PCP4--chr21    
##  7 1.48e-20       1.40 0.535 0.268  2.63e-16 0       TSPAN33--chr7  
##  8 2.96e- 5       1.31 0.482 0.363  5.27e- 1 0       AGT--chr1      
##  9 2.97e-21       1.31 0.604 0.346  5.28e-17 0       HSPA1B--chr6   
## 10 3.63e-16       1.30 0.633 0.431  6.45e-12 0       SERPINI1--chr3 
## # ℹ 30 more rows
beta.sub <- subset(pancreas, idents = "beta")

beta.sub <- NormalizeData(beta.sub)
## Normalizing layer: counts
beta.sub <- FindVariableFeatures(beta.sub, selection.method = "vst", nfeatures = 1000)
## Finding variable features for layer counts
all.genes.beta <- rownames(beta.sub)
beta.sub <- ScaleData(beta.sub, features = all.genes.beta)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
beta.sub <- RunPCA(beta.sub, features = VariableFeatures(beta.sub), npcs = 30)
## PC_ 1 
## Positive:  SRXN1--chr20, FTL--chr19, DDIT3--chr12, TRIB3--chr20, FAM129A--chr1, PSAT1--chr9, HSPH1--chr13, GDF15--chr19, HSPA1A--chr6, RPS4Y1--chrY 
##     ATF3--chr1, HSPA1B--chr6, ADM--chr11, WEE1--chr11, PPP1R15A--chr19, GADD45A--chr1, C7orf53--chr7, HSPA6--chr1, HMOX1--chr22, CHAC1--chr15 
##     CDKN1A--chr6, LOC339524--chr1, DDIT4--chr10, BHLHE40--chr3, HNRNPU-AS1--chr1, STK17A--chr7, HSPA5--chr9, AMY2A--chr1, AKR1C1--chr10, NEB--chr2 
## Negative:  TM4SF4--chr3, FAP--chr2, IRX2--chr5, ARX--chrX, GPR64--chrX, GC--chr4, FABP5--chr8, KCNJ3--chr2, FSTL5--chr4, SMOC1--chr14 
##     FXYD3--chr19, PCSK2--chr20, POPDC3--chr6, CD36--chr7, ABCG2--chr4, BTG2--chr1, TMEM176B--chr7, KLHL41--chr2, RGS4--chr1, GCG--chr2 
##     MUC13--chr3, PTGER3--chr1, SLC38A4--chr12, MYO10--chr5, NPNT--chr4, TMEM176A--chr7, MAGED1--chrX, CHGB--chr20, FEV--chr2, PTPRT--chr20 
## PC_ 2 
## Positive:  REG3A--chr2, REG1A--chr2, REG1B--chr2, SPINK1--chr5, PRSS1--chr7, GDF15--chr19, PNLIP--chr10, OLFM4--chr13, CTRB2--chr16, GADD45A--chr1 
##     SERPINA3--chr14, DDIT3--chr12, CPB1--chr3, HSPA1B--chr6, ELL2--chr5, TRIB3--chr20, HLA-B--chr6, ATF3--chr1, CXCL2--chr4, FAM129A--chr1 
##     ANXA4--chr2, TACSTD2--chr1, CFB--chr6, CPA2--chr7, PPP1R15A--chr19, DUOX2--chr15, CD44--chr11, PLA2G1B--chr12, CTRC--chr1, CPA1--chr7 
## Negative:  PPY--chr17, PCSK1N--chrX, KCNQ1OT1--chr11, MEG3--chr14, CNIH2--chr11, SST--chr3, CPE--chr4, SLC25A34--chr1, MAB21L3--chr1, PCSK2--chr20 
##     EDN3--chr20, MLXIPL--chr7, IAPP--chr12, EGR1--chr5, NOL4--chr18, LOC643406--chr20, LOC100131257--chr7, SCG3--chr15, UGDH-AS1--chr4, PEMT--chr17 
##     C19orf77--chr19, ABCC9--chr12, HLA-DQB1--chr6, LOC90834--chr22, FOS--chr14, DHRS2--chr14, LOC100335030--chr12, M1--chr10, IKZF3--chr17, CSRNP3--chr2 
## PC_ 3 
## Positive:  SCG3--chr15, CHGA--chr14, MAN1A1--chr6, REG3A--chr2, PAM--chr5, ITGAV--chr2, PCP4--chr21, B2M--chr15, RASD1--chr17, VGF--chr7 
##     DHRS2--chr14, GSN--chr9, AQP3--chr9, TSPAN1--chr1, IER3--chr6, NPTX2--chr7, MAGED1--chrX, ITM2C--chr2, JUNB--chr19, EXPH5--chr11 
##     REG1A--chr2, MIR7-3HG--chr19, SCG2--chr2, SHISA6--chr17, ID1--chr20, LRRTM3--chr10, FOS--chr14, GRIN2A--chr16, PAPSS2--chr10, GPM6A--chr4 
## Negative:  GC--chr4, FABP5--chr8, ARX--chrX, IRX2--chr5, TMEM236--chr10, TM4SF4--chr3, LOXL4--chr10, FABP5P3--chr7, POPDC3--chr6, FAP--chr2 
##     SPOCK3--chr4, DDIT3--chr12, PPY--chr17, LOC643406--chr20, TRIB3--chr20, KLHL41--chr2, GCG--chr2, ABCG2--chr4, FEV--chr2, PTGER3--chr1 
##     CD36--chr7, PTPRT--chr20, CNIH3--chr1, PYROXD2--chr10, GATA6--chr18, CRYM--chr16, MAOB--chrX, GPR64--chrX, MUC13--chr3, KCNQ1OT1--chr11 
## PC_ 4 
## Positive:  PPY--chr17, LOC643406--chr20, KCNQ1OT1--chr11, MAB21L3--chr1, PCSK1N--chrX, RPL29--chr3, UGDH-AS1--chr4, TLCD2--chr17, ABCC9--chr12, RPS4Y1--chrY 
##     CTRB2--chr16, CLPS--chr6, REG1B--chr2, LOC100131257--chr7, C21orf62--chr21, LSAMP--chr3, PLA2G1B--chr12, VSTM4--chr10, SIX3--chr2, RAMP2-AS1--chr17 
##     TMEM212--chr3, CELA2A--chr1, CPA1--chr7, LOC100190986--chr16, LOC100216546--chr7, KIAA1919--chr6, SPINK1--chr5, PRSS1--chr7, CEACAM22P--chr19, SLC31A1--chr9 
## Negative:  CHGA--chr14, SPINT2--chr19, CECR1--chr22, RBP4--chr10, TTR--chr18, GCH1--chr14, XIST--chrX, SPOCK3--chr4, B2M--chr15, ALDH1A1--chr9 
##     HLA-C--chr6, PYROXD2--chr10, HLA-B--chr6, VGF--chr7, FXYD6--chr11, NPTX2--chr7, GPM6A--chr4, LOXL4--chr10, FABP5P3--chr7, POPDC3--chr6 
##     FAP--chr2, ABCG2--chr4, CRTAC1--chr10, FABP5--chr8, PTGER3--chr1, ENPP2--chr8, PLS3--chrX, CRYM--chr16, FXYD3--chr19, SMARCA1--chrX 
## PC_ 5 
## Positive:  PNLIP--chr10, SPINK1--chr5, PLA2G1B--chr12, CTRB2--chr16, REG1B--chr2, REG1A--chr2, TTR--chr18, PRSS1--chr7, CPA1--chr7, PRSS3P2--chr7 
##     CEL--chr9, GCG--chr2, CPA2--chr7, CELA3A--chr1, CTRB1--chr16, CXCL2--chr4, AHNAK--chr11, SCG5--chr15, IGFBP7--chr4, CTRC--chr1 
##     RPL29--chr3, AMY2A--chr1, FGL1--chr8, PCSK2--chr20, IMPA2--chr18, VIM--chr10, KCNA5--chr12, MGST1--chr12, CD44--chr11, CLPS--chr6 
## Negative:  HSPA1B--chr6, VMP1--chr17, GADD45A--chr1, TRIB3--chr20, HSPA5--chr9, ADM--chr11, HSPA1A--chr6, PPP1R15A--chr19, HSP90B1--chr12, CDKN1A--chr6 
##     XIST--chrX, HSPH1--chr13, DDIT3--chr12, DUSP4--chr8, ATF3--chr1, FBXW7--chr4, BCL6--chr3, TMSB4X--chrX, GADD45B--chr19, NEFM--chr8 
##     RGS16--chr1, FAM129A--chr1, ELL2--chr5, GEM--chr8, HMOX1--chr22, WEE1--chr11, AKAP12--chr6, GDF15--chr19, FBXO32--chr8, EDA2R--chrX
## Warning: Number of dimensions changing from 50 to 30
ElbowPlot(beta.sub, ndims = 30)

dims.beta <- 1:30

beta.sub <- FindNeighbors(beta.sub, dims = dims.beta)
## Computing nearest neighbor graph
## Computing SNN
beta.sub <- FindClusters(beta.sub, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 492
## Number of edges: 24249
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8127
## Number of communities: 4
## Elapsed time: 0 seconds
beta.sub <- RunUMAP(beta.sub, dims = dims.beta)
## 17:07:25 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:07:25 Read 492 rows and found 30 numeric columns
## 17:07:25 Using Annoy for neighbor search, n_neighbors = 30
## 17:07:25 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:07:25 Writing NN index file to temp file /var/folders/c0/fhbpbpvd589539lj_whv4sc40000gn/T//Rtmprwi0No/file22e6591f3b30
## 17:07:25 Searching Annoy index using 1 thread, search_k = 3000
## 17:07:25 Annoy recall = 100%
## 17:07:26 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:07:26 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:07:26 Commencing optimization for 500 epochs, with 18168 positive edges
## 17:07:26 Using rng type: pcg
## 17:07:27 Optimization finished
DimPlot(beta.sub, reduction = "umap", label = TRUE, pt.size = 0.5, , label.size = 6) + ggtitle("Sub-clustering: beta - cells")

markers_b <- FindAllMarkers(beta.sub, only.pos=TRUE, logfc.threshold=0.25, min.pct=0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
markers_b %>% group_by(cluster) %>% slice_max(n = 10, order_by = avg_log2FC)
## # A tibble: 40 × 7
## # Groups:   cluster [4]
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene          
##       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>         
##  1 3.06e-12       2.00 0.386 0.105  5.45e- 8 0       DUOX2--chr15  
##  2 4.57e-17       1.98 0.551 0.205  8.12e-13 0       GPM6A--chr4   
##  3 4.86e-10       1.96 0.426 0.186  8.64e- 6 0       FAM107A--chr3 
##  4 6.69e-14       1.95 0.456 0.141  1.19e- 9 0       TSPAN5--chr4  
##  5 4.21e- 8       1.89 0.412 0.205  7.48e- 4 0       SLC18A2--chr10
##  6 7.90e- 3       1.81 0.342 0.245  1   e+ 0 0       THBD--chr20   
##  7 2.84e- 7       1.79 0.526 0.305  5.05e- 3 0       AQP3--chr9    
##  8 1.69e-11       1.72 0.375 0.1    3.01e- 7 0       XIST--chrX    
##  9 7.61e- 7       1.70 0.301 0.123  1.35e- 2 0       GLT25D2--chr1 
## 10 3.53e- 6       1.64 0.25  0.091  6.28e- 2 0       ATP7B--chr13  
## # ℹ 30 more rows
old_genes <- rownames(alpha.sub)

clean_genes <- sapply(old_genes, function(x) strsplit(x, split="--|__")[[1]][1])

rownames(alpha.sub) <- clean_genes

FeaturePlot(object = alpha.sub, features  = c("XIST", "LSAMP", "DGKH", "CTRB1"), reduction = "umap", cols = c("lightgray","red"), pt.size = 1)

old_genes <- rownames(beta.sub)

clean_genes <- sapply(old_genes, function(x) strsplit(x, split="--|__")[[1]][1])

rownames(beta.sub) <- clean_genes

FeaturePlot(object = beta.sub, features  = c("DUOX2", "HLA-DQB1", "GDF15", "CRYM"), reduction = "umap", cols = c("lightgray","red"), pt.size = 1)

## Differential Expression by Sex

pancreas$donor <- factor( sub("-.*", "", pancreas$orig.ident) )

pancreas$sex <- ifelse(pancreas$donor == "D30", "F", ifelse(pancreas$donor == "D29", "M", NA) )

pancreas$sex <- factor(pancreas$sex, levels = c("M","F"))

pancreas_sex <- subset(pancreas, subset = donor %in% c("D29","D30"))

table(pancreas$donor, pancreas$sex)
##      
##         M   F
##   D28   0   0
##   D29 640   0
##   D30   0 714
##   D31   0   0
pancreas_sex$donor <- droplevels(pancreas_sex$donor)

pancreas_sex$sex   <- droplevels(pancreas_sex$sex)

levels(pancreas_sex$donor)  
## [1] "D29" "D30"
levels(pancreas_sex$sex)  
## [1] "M" "F"
pancreas_sex <- NormalizeData(pancreas_sex)
## Normalizing layer: counts
pancreas_sex <- FindVariableFeatures(pancreas_sex, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
pancreas_sex <- ScaleData(pancreas_sex, vars.to.regress = NULL)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
pancreas_sex <- RunPCA(pancreas_sex, features = VariableFeatures(pancreas_sex))
## PC_ 1 
## Positive:  PAM--chr5, SYT7--chr11, VGF--chr7, ERO1LB--chr1, ABCC8--chr11, MIR7-3HG--chr19, PLCXD3--chr5, MLXIPL--chr7, KIF5C--chr2, PEG10--chr7 
##     STMN2--chr8, RASD1--chr17, G6PC2--chr2, RGS4--chr1, CFC1--chr2, WNT4--chr1, CRYBA2--chr2, ALCAM--chr3, SCD--chr10, PRUNE2--chr9 
##     ATP2A3--chr17, SYT13--chr11, FEV--chr2, FAM105A--chr5, PCSK1--chr5, MEIS2--chr15, SORL1--chr11, PCP4--chr21, CACNA1D--chr3, KCNK17--chr6 
## Negative:  IFITM3--chr11, ZFP36L1--chr14, REST--chr4, PMEPA1--chr20, NOTCH2--chr1, COL4A2--chr13, EPS8--chr12, HK1--chr10, MYH9--chr22, MYOF--chr10 
##     MSN--chrX, MICAL2--chr11, YAP1--chr11, FLNA--chrX, CLIC4--chr1, FSTL1--chr3, PPIC--chr5, COL18A1--chr21, SOX4--chr6, S100A11--chr1 
##     LGALS3--chr14, SERPINH1--chr11, PHLDA1--chr12, CAV2--chr7, C1S--chr12, LAMC1--chr1, RBPMS--chr8, ANO6--chr12, ANXA2P2--chr9, AHNAK--chr11 
## PC_ 2 
## Positive:  PDGFRB--chr5, COL6A3--chr2, CDH11--chr16, COL5A2--chr2, COL5A1--chr9, BGN--chrX, COL6A2--chr21, MRC2--chr17, THBS2--chr6, LAMA4--chr6 
##     NID1--chr1, COL5A3--chr19, ITGA11--chr15, LOXL2--chr8, COL3A1--chr2, SPARC--chr5, COL15A1--chr9, LTBP2--chr14, FBN1--chr15, SFRP2--chr4 
##     VCAN--chr5, LUM--chr12, THY1--chr11, CYGB--chr17, COL1A2--chr7, LHFP--chr13, SRPX2--chrX, EDNRA--chr4, WNT5A--chr3, PRRX1--chr1 
## Negative:  TACSTD2--chr1, CD24--chrY, ANXA4--chr2, CLDN10--chr13, CFB--chr6, KRT7--chr12, CLDN1--chr3, LAD1--chr1, KRT8--chr12, SERPINA3--chr14 
##     ELF3--chr1, KRT18--chr12, ABCC3--chr17, KIAA1522--chr1, TM4SF1--chr3, TMC5--chr16, CFTR--chr7, RASEF--chr9, LCN2--chr9, MMP7--chr11 
##     CLDN4--chr7, SDC4--chr20, SLC4A4--chr4, MUC20--chr3, KRT19--chr17, MET--chr7, ATP10B--chr5, PDZK1IP1--chr1, TMPRSS2--chr21, ERBB3--chr12 
## PC_ 3 
## Positive:  SCD5--chr4, PDX1--chr13, HADH--chr4, SORL1--chr11, INS--chr11, SYT13--chr11, CASR--chr3, MAFA--chr8, SYNE2--chr14, NPTX2--chr7 
##     PCSK1--chr5, PFKFB2--chr1, TGFBR3--chr1, UCHL1--chr4, ADCYAP1--chr18, PRSS23--chr11, GLIS3--chr9, ITPR3--chr6, TIMP2--chr17, IAPP--chr12 
##     SLC6A17--chr1, ENTPD3--chr3, MEG3--chr14, DHRS2--chr14, PCDH7--chr4, PVRL3--chr3, LOC154761--chr7, EXPH5--chr11, FFAR4--chr10, MAPT--chr17 
## Negative:  FABP5--chr8, FAP--chr2, FEV--chr2, LOXL4--chr10, CRYBA2--chr2, MUC13--chr3, KLHL41--chr2, TMEM236--chr10, CD36--chr7, PLCE1--chr10 
##     PDK4--chr7, RGS4--chr1, LAPTM5--chr1, CD53--chr1, TYROBP--chr19, FCER1G--chr1, ABI3--chr17, SRGN--chr10, ARRDC4--chr15, MS4A7--chr11 
##     LCP1--chr13, IFI30--chr19, PYROXD2--chr10, LDHA--chr11, IGFBP2--chr2, MYO1F--chr19, C1QB--chr1, RGS1--chr1, C1QC--chr1, ARHGDIB--chr12 
## PC_ 4 
## Positive:  FAP--chr2, MUC13--chr3, RGS4--chr1, LOXL4--chr10, CRYBA2--chr2, FEV--chr2, TMEM236--chr10, KLHL41--chr2, GPC6--chr13, C15orf48--chr15 
##     PLCE1--chr10, MGST1--chr12, FKBP11--chr12, LDHA--chr11, IGFBP2--chr2, ARRDC4--chr15, PYROXD2--chr10, PRSS3--chr9, MYO10--chr5, NPNT--chr4 
##     PDK4--chr7, PPP2R2B--chr5, PTGER3--chr1, SPTSSB--chr3, AOX1--chr2, FAM84A--chr2, GSTA1--chr6, IMPA2--chr18, CBS--chr21, CERCAM--chr9 
## Negative:  ABI3--chr17, LAPTM5--chr1, TYROBP--chr19, SRGN--chr10, CD53--chr1, FCER1G--chr1, LPAR6--chr13, LCP1--chr13, TNFRSF1B--chr1, RGS1--chr1 
##     MYO1F--chr19, C1QB--chr1, C1QC--chr1, RUNX3--chr1, HCK--chr20, LCP2--chr5, PLA2G7--chr6, STAB1--chr3, PTPRC--chr1, MS4A7--chr11 
##     GMFG--chr19, HLA-DPA1--chr6, ACP5--chr19, ARHGDIB--chr12, NCKAP1L--chr12, IGSF6--chr16, ITGB2--chr21, FYB--chr5, PECAM1--chr17, HLA-DRB1--chr6 
## PC_ 5 
## Positive:  PRSS3P2--chr7, PRSS1--chr7, REG1B--chr2, CPA2--chr7, GSTA2--chr6, CTRB2--chr16, PLA2G1B--chr12, PNLIP--chr10, CPA1--chr7, CTRC--chr1 
##     ALDOB--chr9, KLK1--chr19, CELA2A--chr1, CEL--chr9, ALB--chr4, CPB1--chr3, PRSS3--chr9, CTRB1--chr16, CELA3A--chr1, BCAT1--chr12 
##     FAM129A--chr1, PNLIPRP1--chr10, REG1A--chr2, PNLIPRP2--chr10, SPINK1--chr5, LGALS2--chr22, ANPEP--chr15, MGST1--chr12, CTRL--chr16, CXCL17--chr19 
## Negative:  IGFBP7--chr4, CFTR--chr7, ALDH1A3--chr15, VTCN1--chr1, TINAGL1--chr1, AQP1--chr7, PAQR5--chr15, FUT3--chr19, SERPINA1--chr14, S100A14--chr1 
##     APCDD1--chr18, PKHD1--chr6, CEACAM7--chr19, THSD4--chr15, SEPT9--chr17, TSPAN8--chr12, TFPI2--chr7, SLC4A4--chr4, NPNT--chr4, PPARGC1A--chr4 
##     SLC3A1--chr2, PDGFD--chr11, DEFB1--chr8, SPNS2--chr17, ITGB8--chr7, DCDC2--chr6, KRT23--chr17, VCAM1--chr1, BICC1--chr10, WWTR1--chr3
ElbowPlot(pancreas_sex, ndims = 30)

pancreas_sex <- FindNeighbors(pancreas_sex, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
pancreas_sex <- FindClusters(pancreas_sex, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1354
## Number of edges: 46810
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9436
## Number of communities: 7
## Elapsed time: 0 seconds
pancreas_sex <- RunTSNE(pancreas_sex, dims = 1:15)

Idents(pancreas_sex) <- "donor"

de_all <- FindMarkers(
  object          = pancreas_sex,
  ident.1         = "D30",      
  ident.2         = "D29",      
  logfc.threshold = 0.25,
  min.pct         = 0.1
)
de_sorted_pos <- de_all[order(de_all$avg_log2FC, decreasing = TRUE), ]

de_sorted_neg <- de_all[order(de_all$avg_log2FC, decreasing = FALSE), ]

de_sig <- de_all[ de_all$p_val_adj < 0.05, ]

length(which(de_all$p_val_adj < 0.05))
## [1] 2858
genes_F <- rownames(de_sig[ de_sig$avg_log2FC > 0, ])

genes_M <- rownames(de_sig[ de_sig$avg_log2FC < 0, ])

ord_F <- order( de_sig[genes_F, "avg_log2FC"], decreasing = TRUE )

top20_F <- genes_F[ ord_F[1:10] ]

ord_M <- order( de_sig[genes_M, "avg_log2FC"], decreasing = FALSE )

top20_M <- genes_M[ ord_M[1:10] ]

top20_both <- c(top20_F, top20_M)


my_palette <- colorRampPalette(c("bisque", "coral",  "blue"))(100)

heatmap_plot <- DoHeatmap(
  object   = pancreas_sex,
  features = top20_both,
  group.by = "donor",       
  slot     = "data"          
) +
  scale_fill_gradientn(
    colors = my_palette,
    name   = "Expression\n(log norm)"
  ) +
  theme(
    axis.text.x     = element_blank(),  
    axis.ticks.x    = element_blank(),
    panel.grid      = element_blank(),
    legend.position = "right"
  )
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(heatmap_plot)

## Differential Expression by Age

pancreas$age <- ifelse(pancreas$donor == "D29", "younger",  ifelse(pancreas$donor == "D31", "older", NA) )

pancreas$age <- factor(pancreas$age, levels = c("younger","older"))

pancreas_age <- subset(pancreas, subset = donor %in% c("D29","D31"))

pancreas_age <- subset(pancreas, subset = donor %in% c("D29","D31"))


table(pancreas_age$donor, pancreas_age$age)
##      
##       younger older
##   D29     640     0
##   D31       0   735
pancreas_age$donor <- droplevels(pancreas_age$donor)
pancreas_age$age   <- droplevels(pancreas_age$age)

pancreas_age <- NormalizeData(pancreas_age)
## Normalizing layer: counts
pancreas_age <- FindVariableFeatures(pancreas_age, selection.method = "vst", nfeatures = 2000)
## Finding variable features for layer counts
pancreas_age <- ScaleData(pancreas_age, vars.to.regress = NULL)
## Centering and scaling data matrix
## Warning: Different features in new layer data than already exists for
## scale.data
pancreas_age <- RunPCA(pancreas_age, features = VariableFeatures(pancreas_age))
## PC_ 1 
## Positive:  CHGB--chr20, SCG2--chr2, PCSK1N--chrX, PAM--chr5, ABCC8--chr11, VGF--chr7, GAD2--chr10, CNIH2--chr11, ERO1LB--chr1, STMN2--chr8 
##     G6PC2--chr2, PEG10--chr7, RGS4--chr1, PCDH17--chr13, TMEM176A--chr7, SCD--chr10, HEPACAM2--chr7, SLC38A4--chr12, ARX--chrX, CFC1--chr2 
##     GPX3--chr5, CRYBA2--chr2, TM4SF4--chr3, PDE3B--chr11, SEZ6L--chr22, UCHL1--chr4, GC--chr4, SST--chr3, PEMT--chr17, FEV--chr2 
## Negative:  IFITM3--chr11, ZFP36L1--chr14, CAV2--chr7, REST--chr4, SOX4--chr6, NFIB--chr9, TACSTD2--chr1, PMEPA1--chr20, ANXA4--chr2, LGALS3--chr14 
##     S100A11--chr1, CD44--chr11, RBPMS--chr8, MYH9--chr22, CDC42EP1--chr22, YAP1--chr11, MYL12A--chr18, CLDN1--chr3, KRT7--chr12, TPM1--chr15 
##     IFITM2--chr11, TMSB4X--chrX, SMAD3--chr15, NOTCH2--chr1, SEL1L3--chr4, MYOF--chr10, AHNAK--chr11, OSMR--chr5, ANXA2P2--chr9, SH3BP4--chr2 
## PC_ 2 
## Positive:  CD24--chrY, CDH1--chr16, TACSTD2--chr1, ELF3--chr1, KRT8--chr12, CFB--chr6, KRT18--chr12, PDZK1IP1--chr1, GATM--chr15, TC2N--chr14 
##     ANXA4--chr2, B3GNT7--chr2, KIAA1522--chr1, RASEF--chr9, SERPINA3--chr14, OCLN--chr5, CLDN4--chr7, CLDN10--chr13, KRT7--chr12, CLMN--chr14 
##     ABCC3--chr17, TMPRSS2--chr21, SDC4--chr20, RAB11FIP1--chr8, CLDN1--chr3, CXADR--chr21, AKR1C3--chr10, LAD1--chr1, MUC1--chr1, TMC5--chr16 
## Negative:  SPARC--chr5, NID1--chr1, COL6A3--chr2, COL5A3--chr19, COL15A1--chr9, BGN--chrX, PDGFRB--chr5, LAMA4--chr6, MRC2--chr17, FBN1--chr15 
##     COL5A1--chr9, CDH11--chr16, COL5A2--chr2, PXDN--chr2, COL1A2--chr7, LOXL2--chr8, TBX3--chr12, COL3A1--chr2, CYGB--chr17, SFRP2--chr4 
##     LUM--chr12, LTBP2--chr14, COL4A1--chr13, COL6A2--chr21, SERPINE1--chr7, IGFBP4--chr17, WNT5A--chr3, CTHRC1--chr8, SRPX2--chrX, PRRX1--chr1 
## PC_ 3 
## Positive:  CPA2--chr7, PNLIP--chr10, CTRC--chr1, PLA2G1B--chr12, PRSS3P2--chr7, BCAT1--chr12, PRSS1--chr7, PNLIPRP2--chr10, REG1B--chr2, GSTA2--chr6 
##     KLK1--chr19, CTRB1--chr16, CTRB2--chr16, CPA1--chr7, PRSS3--chr9, CELA2A--chr1, CEL--chr9, CPB1--chr3, ALB--chr4, ALDOB--chr9 
##     CTRL--chr16, PNLIPRP1--chr10, SPINK1--chr5, CELA3A--chr1, REG1A--chr2, CELA2B--chr1, FAM129A--chr1, GP2--chr16, AOX1--chr2, CBS--chr21 
## Negative:  CFTR--chr7, KRT19--chr17, ALDH1A3--chr15, DCDC2--chr6, VTCN1--chr1, TINAGL1--chr1, AQP1--chr7, ONECUT2--chr18, SPP1--chr4, IGFBP7--chr4 
##     APCDD1--chr18, MMP7--chr11, HSD17B2--chr16, BICC1--chr10, CRIM1--chr2, ANXA3--chr4, TFPI2--chr7, THSD4--chr15, S100A10--chr1, KRT23--chr17 
##     DEFB1--chr8, CMTM7--chr3, S100A14--chr1, PFKP--chr10, KRT80--chr12, SERPINA5--chr14, PPARGC1A--chr4, LGALS4--chr19, PKHD1--chr6, HKDC1--chr10 
## PC_ 4 
## Positive:  COL6A3--chr2, COL5A1--chr9, COL1A2--chr7, PDGFRB--chr5, SFRP2--chr4, COL5A3--chr19, BGN--chrX, COL3A1--chr2, CDH11--chr16, COL1A1--chr17 
##     LUM--chr12, LTBP2--chr14, NOTCH3--chr19, FN1--chr2, GFPT2--chr5, THBS2--chr6, COL6A2--chr21, COL5A2--chr2, TNFAIP6--chr2, COL12A1--chr6 
##     PRRX1--chr1, COL6A1--chr21, WNT5A--chr3, ITGA11--chr15, CYGB--chr17, EDNRA--chr4, LAMC3--chr9, ZNF469--chr16, PDGFRA--chr4, CRLF1--chr19 
## Negative:  FLT1--chr13, KDR--chr4, PECAM1--chr17, ELTD1--chr1, EXOC3L2--chr19, ESAM--chr11, CALCRL--chr2, ACVRL1--chr12, CD93--chr20, PLVAP--chr19 
##     SOX18--chr20, ERG--chr21, GPR4--chr19, PTPRB--chr12, EMCN--chr4, ECSCR--chr5, PODXL--chr7, RHOJ--chr14, TIE1--chr1, COL13A1--chr10 
##     RGCC--chr13, SMAD6--chr15, LOC100505495--chr19, S1PR1--chr1, NKX2-3--chr10, ABI3--chr17, GPR116--chr6, ROBO4--chr11, FLT4--chr5, MYCT1--chr6 
## PC_ 5 
## Positive:  HADH--chr4, SCD5--chr4, PDX1--chr13, INS--chr11, NPTX2--chr7, MAFA--chr8, SYT13--chr11, TGFBR3--chr1, ADCYAP1--chr18, SORL1--chr11 
##     TIMP2--chr17, CASR--chr3, PFKFB2--chr1, PCSK1--chr5, UCHL1--chr4, MEG3--chr14, IAPP--chr12, RBP4--chr10, ENTPD3--chr3, MAPT--chr17 
##     FFAR4--chr10, GLIS3--chr9, WSCD2--chr12, LOC154761--chr7, BHLHE41--chr12, PRSS23--chr11, SRXN1--chr20, SMAD9--chr13, DLK1--chr14, PRUNE2--chr9 
## Negative:  TM4SF4--chr3, GC--chr4, IRX2--chr5, FAP--chr2, KCTD12--chr13, LOXL4--chr10, ARX--chrX, FEV--chr2, RGS4--chr1, CRYBA2--chr2 
##     MUC13--chr3, TMEM176A--chr7, FABP5--chr8, PTPRT--chr20, FSTL5--chr4, SERPINE2--chr2, PDK4--chr7, PLCE1--chr10, SMOC1--chr14, TMEM236--chr10 
##     SLC38A4--chr12, KLHL41--chr2, MYO10--chr5, NPNT--chr4, GPR64--chrX, GPX3--chr5, PAPPA2--chr1, PPP2R2B--chr5, LDB2--chr4, HMGB3--chrX
ElbowPlot(pancreas_age, ndims = 30)

pancreas_age <- FindNeighbors(pancreas_age, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
pancreas_age <- FindClusters(pancreas_age, resolution = 0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1375
## Number of edges: 47286
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9514
## Number of communities: 7
## Elapsed time: 0 seconds
pancreas_age <- RunTSNE(pancreas_age, dims = 1:15)

Idents(pancreas_age) <- "donor"

de_all <- FindMarkers(
  object          = pancreas_age,
  ident.1         = "D29",      
  ident.2         = "D31",      
  logfc.threshold = 0.25,
  min.pct         = 0.1
)
de_sorted_pos <- de_all[order(de_all$avg_log2FC, decreasing = TRUE), ]

de_sorted_neg <- de_all[order(de_all$avg_log2FC, decreasing = FALSE), ]

de_sig <- de_all[ de_all$p_val_adj < 0.05, ]

length(which(de_all$p_val_adj < 0.05))
## [1] 2428
genes_F <- rownames(de_sig[ de_sig$avg_log2FC > 0, ])

genes_M <- rownames(de_sig[ de_sig$avg_log2FC < 0, ])

ord_F <- order( de_sig[genes_F, "avg_log2FC"], decreasing = TRUE )

top20_F <- genes_F[ ord_F[1:10] ]

ord_M <- order( de_sig[genes_M, "avg_log2FC"], decreasing = FALSE )
top20_M <- genes_M[ ord_M[1:10] ]

top20_both <- c(top20_F, top20_M)


my_palette <- colorRampPalette(c("bisque", "coral",  "blue"))(100)

heatmap_plot <- DoHeatmap(
  object   = pancreas_age,
  features = top20_both,
  group.by = "donor",       
  slot     = "data"          
) +
  scale_fill_gradientn(
    colors = my_palette,
    name   = "Expression\n(log norm)"
  ) +
  theme(
    axis.text.x     = element_blank(),  
    axis.ticks.x    = element_blank(),
    panel.grid      = element_blank(),
    legend.position = "right"
  )
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
print(heatmap_plot)

## UMAP

pancreas <- FindNeighbors(pancreas, dims = 1:17)
## Computing nearest neighbor graph
## Computing SNN
pancreas <- FindClusters(pancreas, resolution = 0.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2461
## Number of edges: 85784
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8881
## Number of communities: 12
## Elapsed time: 0 seconds
table(Idents(pancreas))
## 
##   0   1   2   3   4   5   6   7   8   9  10  11 
## 570 411 311 260 230 198 143 110  81  75  50  22
pancreas <- RunUMAP(pancreas, dims = 1:17)
## 17:08:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:08:53 Read 2461 rows and found 17 numeric columns
## 17:08:53 Using Annoy for neighbor search, n_neighbors = 30
## 17:08:53 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:08:54 Writing NN index file to temp file /var/folders/c0/fhbpbpvd589539lj_whv4sc40000gn/T//Rtmprwi0No/file22e67eddfe4
## 17:08:54 Searching Annoy index using 1 thread, search_k = 3000
## 17:08:54 Annoy recall = 100%
## 17:08:54 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:08:55 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:08:55 Commencing optimization for 500 epochs, with 100918 positive edges
## 17:08:55 Using rng type: pcg
## 17:08:57 Optimization finished
DimPlot(pancreas, reduction = "umap", label = TRUE)

old_genes <- rownames(pancreas)

clean_genes <- sapply(old_genes, function(x) strsplit(x, split="--|__")[[1]][1])

rownames(pancreas) <- clean_genes
FeaturePlot(
  object = pancreas, features  = c("GHRL", "PHGR1", "VTN", "ANXA13"), reduction = "umap", cols = c("lightgray","red"), pt.size = 1)